{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 读取数据\n",
    "## 特征选择与预处理\n",
    "## 特征规范化\n",
    "## 模型运用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "#导包\n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn import preprocessing\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CustomerID</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Age</th>\n",
       "      <th>Annual Income (k$)</th>\n",
       "      <th>Spending Score (1-100)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "      <td>19</td>\n",
       "      <td>15</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Male</td>\n",
       "      <td>21</td>\n",
       "      <td>15</td>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Female</td>\n",
       "      <td>20</td>\n",
       "      <td>16</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Female</td>\n",
       "      <td>23</td>\n",
       "      <td>16</td>\n",
       "      <td>77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Female</td>\n",
       "      <td>31</td>\n",
       "      <td>17</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>195</th>\n",
       "      <td>196</td>\n",
       "      <td>Female</td>\n",
       "      <td>35</td>\n",
       "      <td>120</td>\n",
       "      <td>79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>196</th>\n",
       "      <td>197</td>\n",
       "      <td>Female</td>\n",
       "      <td>45</td>\n",
       "      <td>126</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>197</th>\n",
       "      <td>198</td>\n",
       "      <td>Male</td>\n",
       "      <td>32</td>\n",
       "      <td>126</td>\n",
       "      <td>74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>199</td>\n",
       "      <td>Male</td>\n",
       "      <td>32</td>\n",
       "      <td>137</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199</th>\n",
       "      <td>200</td>\n",
       "      <td>Male</td>\n",
       "      <td>30</td>\n",
       "      <td>137</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>200 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)\n",
       "0             1    Male   19                  15                      39\n",
       "1             2    Male   21                  15                      81\n",
       "2             3  Female   20                  16                       6\n",
       "3             4  Female   23                  16                      77\n",
       "4             5  Female   31                  17                      40\n",
       "..          ...     ...  ...                 ...                     ...\n",
       "195         196  Female   35                 120                      79\n",
       "196         197  Female   45                 126                      28\n",
       "197         198    Male   32                 126                      74\n",
       "198         199    Male   32                 137                      18\n",
       "199         200    Male   30                 137                      83\n",
       "\n",
       "[200 rows x 5 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#读取数据\n",
    "data=pd.read_csv('Mall_Customers.csv')\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 特征选择与预处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Age</th>\n",
       "      <th>Annual Income (k$)</th>\n",
       "      <th>Spending Score (1-100)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Male</td>\n",
       "      <td>19</td>\n",
       "      <td>15</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Male</td>\n",
       "      <td>21</td>\n",
       "      <td>15</td>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Female</td>\n",
       "      <td>20</td>\n",
       "      <td>16</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Female</td>\n",
       "      <td>23</td>\n",
       "      <td>16</td>\n",
       "      <td>77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Female</td>\n",
       "      <td>31</td>\n",
       "      <td>17</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>195</th>\n",
       "      <td>Female</td>\n",
       "      <td>35</td>\n",
       "      <td>120</td>\n",
       "      <td>79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>196</th>\n",
       "      <td>Female</td>\n",
       "      <td>45</td>\n",
       "      <td>126</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>197</th>\n",
       "      <td>Male</td>\n",
       "      <td>32</td>\n",
       "      <td>126</td>\n",
       "      <td>74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>Male</td>\n",
       "      <td>32</td>\n",
       "      <td>137</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199</th>\n",
       "      <td>Male</td>\n",
       "      <td>30</td>\n",
       "      <td>137</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>200 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Gender  Age  Annual Income (k$)  Spending Score (1-100)\n",
       "0      Male   19                  15                      39\n",
       "1      Male   21                  15                      81\n",
       "2    Female   20                  16                       6\n",
       "3    Female   23                  16                      77\n",
       "4    Female   31                  17                      40\n",
       "..      ...  ...                 ...                     ...\n",
       "195  Female   35                 120                      79\n",
       "196  Female   45                 126                      28\n",
       "197    Male   32                 126                      74\n",
       "198    Male   32                 137                      18\n",
       "199    Male   30                 137                      83\n",
       "\n",
       "[200 rows x 4 columns]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#CustomerID并不是聚类的时候的特征   是顾客的编号\n",
    "#选择其他的特征 组成训练数据集\n",
    "train_x=data[['Gender','Age','Annual Income (k$)','Spending Score (1-100)']] #这样得到了训练的数据集  只有那几个特征的dataframe\n",
    "train_x#其实我感觉 这一步 可以在读取数据的时候  把CustomerID那一列当索引 就行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0      1\n",
       "1      1\n",
       "2      0\n",
       "3      0\n",
       "4      0\n",
       "      ..\n",
       "195    0\n",
       "196    0\n",
       "197    1\n",
       "198    1\n",
       "199    1\n",
       "Name: Gender, Length: 200, dtype: int64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#这里的性别 Gender做一下 转为数字0,1   用LabelEncoder\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "#实例化一个转换器\n",
    "le=LabelEncoder()\n",
    "#然后用这个转换器去fit_transform数据集中的Gender\n",
    "train_x['Gender']=le.fit_transform(train_x['Gender'])\n",
    "#看一下转换完之后的特征\n",
    "train_x['Gender']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Age</th>\n",
       "      <th>Annual Income (k$)</th>\n",
       "      <th>Spending Score (1-100)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>19</td>\n",
       "      <td>15</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>21</td>\n",
       "      <td>15</td>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>20</td>\n",
       "      <td>16</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>16</td>\n",
       "      <td>77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>31</td>\n",
       "      <td>17</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>195</th>\n",
       "      <td>0</td>\n",
       "      <td>35</td>\n",
       "      <td>120</td>\n",
       "      <td>79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>196</th>\n",
       "      <td>0</td>\n",
       "      <td>45</td>\n",
       "      <td>126</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>197</th>\n",
       "      <td>1</td>\n",
       "      <td>32</td>\n",
       "      <td>126</td>\n",
       "      <td>74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>1</td>\n",
       "      <td>32</td>\n",
       "      <td>137</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199</th>\n",
       "      <td>1</td>\n",
       "      <td>30</td>\n",
       "      <td>137</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>200 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Gender  Age  Annual Income (k$)  Spending Score (1-100)\n",
       "0         1   19                  15                      39\n",
       "1         1   21                  15                      81\n",
       "2         0   20                  16                       6\n",
       "3         0   23                  16                      77\n",
       "4         0   31                  17                      40\n",
       "..      ...  ...                 ...                     ...\n",
       "195       0   35                 120                      79\n",
       "196       0   45                 126                      28\n",
       "197       1   32                 126                      74\n",
       "198       1   32                 137                      18\n",
       "199       1   30                 137                      83\n",
       "\n",
       "[200 rows x 4 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#看一下整体的数据\n",
    "train_x#这个时候已经把male转成了1  female转成了0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 特征规范化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1.        , 0.01923077, 0.        , 0.3877551 ],\n",
       "       [1.        , 0.05769231, 0.        , 0.81632653],\n",
       "       [0.        , 0.03846154, 0.00819672, 0.05102041],\n",
       "       [0.        , 0.09615385, 0.00819672, 0.7755102 ],\n",
       "       [0.        , 0.25      , 0.01639344, 0.39795918],\n",
       "       [0.        , 0.07692308, 0.01639344, 0.76530612],\n",
       "       [0.        , 0.32692308, 0.02459016, 0.05102041],\n",
       "       [0.        , 0.09615385, 0.02459016, 0.94897959],\n",
       "       [1.        , 0.88461538, 0.03278689, 0.02040816],\n",
       "       [0.        , 0.23076923, 0.03278689, 0.7244898 ]])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#然后把数据用[0-1]规范化一下   首先实例化一个MinMaxScaler\n",
    "min_max_scaler=preprocessing.MinMaxScaler()\n",
    "#然后用这个实力的转换器去fit_transform一下 数据\n",
    "train_x=min_max_scaler.fit_transform(train_x)\n",
    "#看一下转换之后的数据\n",
    "train_x[0:10]#这里只打印前10行吧 否则上传到github之后 很长"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "#将这个转成一个CSV文件存储起来\n",
    "pd.DataFrame(train_x).to_csv('temp.csv',index=False)#index=False表示 没有索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.019231</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.387755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.057692</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.816327</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.038462</td>\n",
       "      <td>0.008197</td>\n",
       "      <td>0.051020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.096154</td>\n",
       "      <td>0.008197</td>\n",
       "      <td>0.775510</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>0.016393</td>\n",
       "      <td>0.397959</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>195</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.326923</td>\n",
       "      <td>0.860656</td>\n",
       "      <td>0.795918</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>196</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.519231</td>\n",
       "      <td>0.909836</td>\n",
       "      <td>0.275510</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>197</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.269231</td>\n",
       "      <td>0.909836</td>\n",
       "      <td>0.744898</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.269231</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.173469</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.230769</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.836735</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>200 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       0         1         2         3\n",
       "0    1.0  0.019231  0.000000  0.387755\n",
       "1    1.0  0.057692  0.000000  0.816327\n",
       "2    0.0  0.038462  0.008197  0.051020\n",
       "3    0.0  0.096154  0.008197  0.775510\n",
       "4    0.0  0.250000  0.016393  0.397959\n",
       "..   ...       ...       ...       ...\n",
       "195  0.0  0.326923  0.860656  0.795918\n",
       "196  0.0  0.519231  0.909836  0.275510\n",
       "197  1.0  0.269231  0.909836  0.744898\n",
       "198  1.0  0.269231  1.000000  0.173469\n",
       "199  1.0  0.230769  1.000000  0.836735\n",
       "\n",
       "[200 rows x 4 columns]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#再把那个temp读取进来看看\n",
    "temp_data=pd.read_csv('temp.csv')\n",
    "temp_data#这个就是转换好的数据  只是没有索引"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 模型运用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 1, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 2, 1, 0, 1, 2, 0, 2, 1,\n",
       "       0, 1, 0, 1, 0, 1, 0, 0, 2, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 2, 0, 2, 0, 2, 2, 1, 0, 0, 2, 1,\n",
       "       0, 0, 1, 0, 2, 0, 0, 0, 2, 1, 0, 2, 0, 0, 2, 1, 2, 0, 0, 2, 0, 0,\n",
       "       0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 2, 1, 0, 0, 2, 1, 2, 0, 0, 2, 2, 2,\n",
       "       2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 2, 1, 2, 1, 2, 1,\n",
       "       0, 0, 2, 0, 0, 1, 2, 0, 0, 1, 0, 0, 2, 1, 2, 0, 0, 1, 2, 1, 0, 0,\n",
       "       0, 0, 2, 0, 2, 0, 0, 0, 2, 0, 2, 0, 2, 0, 0, 1, 2, 1, 2, 1, 0, 0,\n",
       "       2, 1, 2, 1, 0, 0, 2, 0, 0, 1, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1,\n",
       "       2, 1], dtype=int32)"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#开始使用k-means聚类\n",
    "#实例一个k-means模型\n",
    "kmeans=KMeans(n_clusters=3)\n",
    "predict_y=kmeans.fit_predict(train_x)\n",
    "predict_y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CustomerID</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Age</th>\n",
       "      <th>Annual Income (k$)</th>\n",
       "      <th>Spending Score (1-100)</th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "      <td>19</td>\n",
       "      <td>15</td>\n",
       "      <td>39</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Male</td>\n",
       "      <td>21</td>\n",
       "      <td>15</td>\n",
       "      <td>81</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Female</td>\n",
       "      <td>20</td>\n",
       "      <td>16</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Female</td>\n",
       "      <td>23</td>\n",
       "      <td>16</td>\n",
       "      <td>77</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Female</td>\n",
       "      <td>31</td>\n",
       "      <td>17</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>195</th>\n",
       "      <td>196</td>\n",
       "      <td>Female</td>\n",
       "      <td>35</td>\n",
       "      <td>120</td>\n",
       "      <td>79</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>196</th>\n",
       "      <td>197</td>\n",
       "      <td>Female</td>\n",
       "      <td>45</td>\n",
       "      <td>126</td>\n",
       "      <td>28</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>197</th>\n",
       "      <td>198</td>\n",
       "      <td>Male</td>\n",
       "      <td>32</td>\n",
       "      <td>126</td>\n",
       "      <td>74</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>199</td>\n",
       "      <td>Male</td>\n",
       "      <td>32</td>\n",
       "      <td>137</td>\n",
       "      <td>18</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199</th>\n",
       "      <td>200</td>\n",
       "      <td>Male</td>\n",
       "      <td>30</td>\n",
       "      <td>137</td>\n",
       "      <td>83</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>200 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)  0\n",
       "0             1    Male   19                  15                      39  1\n",
       "1             2    Male   21                  15                      81  1\n",
       "2             3  Female   20                  16                       6  0\n",
       "3             4  Female   23                  16                      77  0\n",
       "4             5  Female   31                  17                      40  0\n",
       "..          ...     ...  ...                 ...                     ... ..\n",
       "195         196  Female   35                 120                      79  0\n",
       "196         197  Female   45                 126                      28  0\n",
       "197         198    Male   32                 126                      74  1\n",
       "198         199    Male   32                 137                      18  2\n",
       "199         200    Male   30                 137                      83  1\n",
       "\n",
       "[200 rows x 6 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#把聚类好的标签跟最初的数据 联接  形成一个文件 看看效果\n",
    "result=pd.concat( (data,pd.DataFrame(predict_y) )    ,axis=1)\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CustomerID</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Age</th>\n",
       "      <th>Annual Income (k$)</th>\n",
       "      <th>Spending Score (1-100)</th>\n",
       "      <th>聚类结果</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "      <td>19</td>\n",
       "      <td>15</td>\n",
       "      <td>39</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Male</td>\n",
       "      <td>21</td>\n",
       "      <td>15</td>\n",
       "      <td>81</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Female</td>\n",
       "      <td>20</td>\n",
       "      <td>16</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Female</td>\n",
       "      <td>23</td>\n",
       "      <td>16</td>\n",
       "      <td>77</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Female</td>\n",
       "      <td>31</td>\n",
       "      <td>17</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>195</th>\n",
       "      <td>196</td>\n",
       "      <td>Female</td>\n",
       "      <td>35</td>\n",
       "      <td>120</td>\n",
       "      <td>79</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>196</th>\n",
       "      <td>197</td>\n",
       "      <td>Female</td>\n",
       "      <td>45</td>\n",
       "      <td>126</td>\n",
       "      <td>28</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>197</th>\n",
       "      <td>198</td>\n",
       "      <td>Male</td>\n",
       "      <td>32</td>\n",
       "      <td>126</td>\n",
       "      <td>74</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>199</td>\n",
       "      <td>Male</td>\n",
       "      <td>32</td>\n",
       "      <td>137</td>\n",
       "      <td>18</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199</th>\n",
       "      <td>200</td>\n",
       "      <td>Male</td>\n",
       "      <td>30</td>\n",
       "      <td>137</td>\n",
       "      <td>83</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>200 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)  聚类结果\n",
       "0             1    Male   19                  15                      39     1\n",
       "1             2    Male   21                  15                      81     1\n",
       "2             3  Female   20                  16                       6     0\n",
       "3             4  Female   23                  16                      77     0\n",
       "4             5  Female   31                  17                      40     0\n",
       "..          ...     ...  ...                 ...                     ...   ...\n",
       "195         196  Female   35                 120                      79     0\n",
       "196         197  Female   45                 126                      28     0\n",
       "197         198    Male   32                 126                      74     1\n",
       "198         199    Male   32                 137                      18     2\n",
       "199         200    Male   30                 137                      83     1\n",
       "\n",
       "[200 rows x 6 columns]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#最后一列更名\n",
    "result.rename({0:u'聚类结果'},axis=1,inplace=True)\n",
    "result"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
